Set up

#NC 12-2-24: this code is now in  a separate combining script, just loading the file now
#combined_all = read.xlsx(here::here("graphs/combined/combined_all.xlsx"))
combined_all = read.xlsx(here::here("graphs/combined/master_aggregated_CATE.xlsx"))
  #specifying axis maximums (x and y both) and breaks by datasets
  #update this when adding new datasets/outcomes
  #TODO: may need to change these depending on future simulation results
  scenario_maximums = rbind(
    data.frame(dataset = "asap", outcome = "X16BTMCRET", max = 1.2, max_bias = 1.2, max_se = 1.2, breaks = I(list(c(0,.2,.4,.6,.8, 1))),max_agg = .35, max_bias_agg = .35, max_se_agg = .35, breaks_agg = I(list(c(0,.1,.2,.3)))),
    data.frame(dataset = "asap", outcome = "C16BMVDEG", max = 1, max_bias = 1, max_se = 1, breaks = I(list(c(0,.2,.4,.6,.8))),max_agg = 1, max_bias_agg = 1, max_se_agg = 1, breaks_agg = I(list(c(0,.2,.4,.6,.8)))))
all_scenarios = combined_all
df = combined_all
all_scenarios$set_id = paste( df$dataset, df$outcome, df$cov_set_size, df$train_set_size, sep="-" )

ALL_MODELS <- unique( all_scenarios$model )

TYPE_SHAPE_MAP <- c(
  "ATE" = 20, "OLS S" = 18, 
  "INF" = 2, "RF" = 3, "CDML" = 8,
  "LASSO" = 10, "SL" = 16,
  "XGBOOST" = 1, "BART" = 0
)

TYPE_COLOR_MAP <- c(
  "ATE" = "black", "OLS S" = "black", 
  "INF" = "darkgrey", "RF" = "#E69F00", "CDML" = "#F0E442",
  "LASSO" = "#009E73", "SL" = "#D55E00",
  "XGBOOST" = "#CC79A7", "BART" = "#0072B2" # "#56B4E9"
)
LEGEND_COLORS = setdiff( names(TYPE_COLOR_MAP), c("LASSO INF", "RF INF", "ATE", "OLS S") )

# For individual methods
SHAPE_MAP <- c(
  "ATE" = 0, "OLS S" = 1, 
  "RF INF" = 2, "RF T" = 3, "RF MOM IPW" = 4, "RF MOM DR" = 5, 
  "CF" = 6, "CF LC" = 7, "CDML" = 8, "LASSO INF" = 9, "LASSO T" = 10, "LASSO MOM IPW" = 11, 
  "LASSO MOM DR" = 12, "LASSO MCM" = 13, "LASSO MCM EA" = 14, "LASSO R" = 15, 
  "SL T" = 16, "SL S" = 17, "XGBOOST S" = 18, "XGBOOST R" = 19, "BART T" = 20, "BART S" = 21
)

old_type_shape_map <- c(
  "ATE" = 18, "OLS S" = 20, "INF" = 2, "RF" = 3, "CDML" = 8,  "LASSO" = 10, "SL" = 16, "XGBOOST" = 0, "BART" = 1
)
old_shape_map <- c(
  "ATE" = 0, "OLS S" = 1, "RF INF" = 2, "RF T" = 3, "RF MOM IPW" = 4, "RF MOM DR" = 5, 
  "CF" = 6, "CF LC" = 7, "CDML" = 8, "LASSO INF" = 9, "LASSO T" = 10, "LASSO MOM IPW" = 11, 
  "LASSO MOM DR" = 12, "LASSO MCM" = 13, "LASSO MCM EA" = 14, "LASSO R" = 15, 
  "SL T" = 16, "SL S" = 17, "XGBOOST S" = 18, "XGBOOST R" = 19, "BART T" = 20, "BART S" = 21
)

Figures

An example of each kind of visualization

contour_onequeen (shown: asap_1_small_1000 for RF T queen)

contour_onequeen(scen_dataset="asap", scen_outcome = "X16BTMCRET", scen_train_set_size = "1000", scen_cov_set_size = "small", plotqueen = "RF T")
## Scale for shape is already present.
## Adding another scale for shape, which will replace the existing scale.

scenario_plot (shown: asap_2_small_1000)

scenario_plot(scen_dataset="asap", scen_outcome = "X16BTMCRET", scen_train_set_size = "1000", scen_cov_set_size = "small")

Stacked mini plots for ASAP outcome 2

stack_miniplots(scen_dataset= "asap", scen_outcome = "X16BTMCRET"
               # , scen_train_set_size = "2000", scen_cov_set_size = "small"
                )
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## ATE and CDML queens only

plot_two_queens(scen_dataset= "asap", scen_outcome = "X16BTMCRET", queen1="ATE", queen2="CDML")

continuous scenario_allqueens (shown: asap_2_small_1000)

scenario_allqueens(scen_dataset="asap", scen_outcome = "X16BTMCRET", scen_train_set_size = "1000", scen_cov_set_size = "small")
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## TODO: fix binary scenario_allqueens (shown: asap_1_small_1000)

scenario_allqueens(scen_dataset="asap", scen_outcome = "C16BMVDEG", scen_train_set_size = "2000", scen_cov_set_size = "small")

Percent poorly estimated

scenario_poorlyest(scen_dataset="asap", scen_outcome = "X16BTMCRET", scen_train_set_size = "1000", scen_cov_set_size = "small")

Trail plot

# Moving to large covariate set plot  ----


scenario_trail(scen_dataset="asap", scen_outcome = "X16BTMCRET", 
                   additional_filter_var = "cov_set_size",
                   additional_filter_val = "small", 
                   trail_var = "train_set_size",
                   trail_val_from = 1000, 
                   trail_val_to = 5000)

Regression Plots

Coefficient Plot

cs = make_cs(all_scenarios,scen_dataset = "ca", scen_outcome="Y18JBERNA_06")
coef_plot(cs)

Coefficient Plot, Model Only

cs = make_cs(all_scenarios,scen_dataset = "ca", scen_outcome="Y18JBERNA_06")
coef_plot(cs, plotOnly = "model")

Ex Factor Plot, CA outcome 1

c_coef = make_c_coef(all_scenarios, scen_dataset = "ca", scen_outcome="Y18JBERNA_06" )
ex_factor_plot(c_coef)

TO DO: not added yet

Animations

Others?